package us.codecraft.webmagic.pipeline;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.Mongo;

public class SavePageToMongo {
	public static void main(String[] args) throws IOException {
		String workingDir = args[0];

		File dir = new File(workingDir); 
		File[] files = dir.listFiles();
		String uri = null;

		BufferedReader br = null;
		
		for (File f : files) {
			if (f.isFile()) {
				FileReader fr = new FileReader(f);
				br = new BufferedReader(fr);
				
				uri = f.getName();
				
				Document doc = Jsoup.parse(f, "utf8");
				BasicDBObject mongoDoc = new BasicDBObject();
				
				java.util.Date date= new java.util.Date();
				
				mongoDoc = new HTMLToJSON(doc).ElementsToJSON();
				mongoDoc.append("url", uri);
				mongoDoc.append("time", date.getTime());
				
				Mongo mc = new Mongo();
				DB db = mc.getDB("test");
				DBCollection coll = db.getCollection("jtest");
				
				coll.insert(mongoDoc);
				
				f.delete();

			}
			
		}
		br.close();
	}
	
}


